import pandas as pd
from pathlib import Path

# 1. 配置输入、输出
input_file = "Both Data.xlsx"
output_file = "Monthly_Counts_Quad&Event.xlsx"
valid_event_codes = [str(i).zfill(2) for i in range(1,21)]
quad_classes = [1,2,3,4]

# 2. 读取并预处理
df = pd.read_excel(input_file,
                   usecols=['MonthYear','QuadClass','EventRootCode','Actor1CountryCode','Actor2CountryCode'],
                   engine="openpyxl")
# 转 MonthYear 为 Period，再转 Timestamp（每月第一天）
df['Month'] = (pd.to_datetime(df['MonthYear'].astype(str), format='%Y%m', errors='coerce')
                  .dt.to_period('M').dt.to_timestamp())
# 规范 EventRootCode
df['EventRootCode'] = (df['EventRootCode'].astype(str)
                          .str.lstrip('0').str.zfill(2))
df = df[df['EventRootCode'].isin(valid_event_codes)]

# 3. 统计 QuadClass 月度频次
months = pd.date_range(df['Month'].min(), df['Month'].max(), freq='MS')
idx_q = pd.MultiIndex.from_product([months, quad_classes], names=['Month','QuadClass'])
quad_counts = (df.groupby(['Month','QuadClass']).size()
                 .reindex(idx_q, fill_value=0)
                 .unstack('QuadClass'))

# 4. 按 CT/TC 分组统计 EventRootCode 月度频次
mask_ct = (df['Actor1CountryCode']=='CHN') & (df['Actor2CountryCode']=='TWN')
mask_tc = (df['Actor1CountryCode']=='TWN') & (df['Actor2CountryCode']=='CHN')

def event_counts(subdf):
    idx_e = pd.MultiIndex.from_product([months, valid_event_codes],
                                       names=['Month','EventRootCode'])
    return (subdf.groupby(['Month','EventRootCode']).size()
                .reindex(idx_e, fill_value=0)
                .unstack('EventRootCode'))

counts_ct = event_counts(df[mask_ct])
counts_tc = event_counts(df[mask_tc])
# 总表：不区分方向，直接相加
counts_total = counts_ct.add(counts_tc, fill_value=0).astype(int)

# 5. 写入同一个 Excel 的四个 sheet
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
    quad_counts.to_excel(writer, sheet_name='QuadClass')
    counts_ct.to_excel(writer, sheet_name='EventRootCode_CHN_TWN')
    counts_tc.to_excel(writer, sheet_name='EventRootCode_TWN_CHN')
    counts_total.to_excel(writer, sheet_name='EventRootCode')

print(f"已生成：{output_file}，含 4 个工作表。")
